// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h
// and modified by Doris

#pragma once

#include <vec/common/hex.h>
#include <vec/common/string_utils/string_utils.h>
#include <vec/core/types.h>

#include <algorithm>
#include <array>
#include <bit>
#include <cstdint>
#include <cstring>
#include <utility>

constexpr size_t IPV4_BINARY_LENGTH = 4;
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
constexpr size_t IPV4_MIN_NUM_VALUE = 0;          //num value of '0.0.0.0'
constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255'
constexpr int IPV4_MAX_OCTET_VALUE = 255;         //max value of octet
constexpr size_t IPV4_OCTET_BITS = 8;
constexpr size_t DECIMAL_BASE = 10;
constexpr size_t IPV6_BINARY_LENGTH = 16;

namespace doris::vectorized {

extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table;

/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
  * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
  *
  * Any number of the tail bytes can be masked with given mask string.
  *
  * Assumptions:
  *     src is IPV4_BINARY_LENGTH long,
  *     dst is IPV4_MAX_TEXT_LENGTH long,
  *     mask_tail_octets <= IPV4_BINARY_LENGTH
  *     mask_string is NON-NULL, if mask_tail_octets > 0.
  *
  * Examples:
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
  *         > dst == "127.0.0.1"
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
  *         > dst == "127.0.0.xxx"
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
  *         > dst == "127.0.0.0"
  */
inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst,
                        uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") {
    const size_t mask_length = mask_string ? strlen(mask_string) : 0;
    const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
    const size_t padding = std::min(4 - src_size, limit);
    for (size_t octet = 0; octet < padding; ++octet) {
        *dst++ = '0';
        *dst++ = '.';
    }

    for (size_t octet = 4 - src_size; octet < limit; ++octet) {
        uint8_t value = 0;
        if constexpr (std::endian::native == std::endian::little)
            value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
        else
            value = static_cast<uint8_t>(src[octet]);
        const uint8_t len = one_byte_to_string_lookup_table[value].second;
        const char* str = one_byte_to_string_lookup_table[value].first;

        memcpy(dst, str, len);
        dst += len;

        *dst++ = '.';
    }

    for (size_t mask = 0; mask < mask_tail_octets; ++mask) {
        memcpy(dst, mask_string, mask_length);
        dst += mask_length;

        *dst++ = '.';
    }

    dst--;
}

inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0,
                        const char* mask_string = "xxx") {
    format_ipv4(src, 4, dst, mask_tail_octets, mask_string);
}

/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
 *
 * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
 * which should be long enough.
 * That is "127.0.0.1" becomes 0x7f000001.
 *
 * In case of failure doesn't modify buffer pointed by `dst`.
 *
 * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
 *           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
 *           To parse strings use overloads below.
 *
 * @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
 * @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
 * @param dst         - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
 * @param first_octet - preparsed first octet
 * @return            - true if parsed successfully, false otherwise.
 */
template <typename T, typename EOFfunction>
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) {
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
        return false;
    }

    int64_t result = 0;
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
    if (first_octet >= 0) {
        result |= first_octet << offset;
        offset -= IPV4_OCTET_BITS;
    }

    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
        if (eof()) {
            return false;
        }

        int64_t value = 0;
        size_t len = 0;
        while (is_numeric_ascii(*src) && len <= 3) {
            value = value * DECIMAL_BASE + (*src - '0');
            ++len;
            ++src;
            if (eof()) {
                break;
            }
        }
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
            return false;
        }
        result |= value << offset;

        if (offset == 0) {
            break;
        }
    }

    memcpy(dst, &result, sizeof(result));
    return true;
}

/// returns pointer to the right after parsed sequence or null on failed parsing
inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) {
    if (parse_ipv4(
                src, [&src, end]() { return src == end; }, dst)) {
        return src;
    }
    return nullptr;
}

/// returns true if whole buffer was parsed successfully
inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) {
    return parse_ipv4(src, end, dst) == end;
}

/// returns pointer to the right after parsed sequence or null on failed parsing
inline const char* parse_ipv4(const char* src, unsigned char* dst) {
    if (parse_ipv4(
                src, []() { return false; }, dst)) {
        return src;
    }
    return nullptr;
}

/// returns true if whole null-terminated string was parsed successfully
inline bool parse_ipv4_whole(const char* src, unsigned char* dst) {
    const char* end = parse_ipv4(src, dst);
    return end != nullptr && *end == '\0';
}

/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) {
    return value >= base ? 1 + int_log(value / base, base, value % base || carry)
                         : value % base > 1 || carry;
}

/// Print integer in desired base, faster than sprintf.
/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
/// But it doesn't matter here.
template <UInt32 base, typename T>
inline void print_integer(char*& out, T value) {
    if (value == 0) {
        *out++ = '0';
    } else {
        constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false);

        char buf[buffer_size];
        auto ptr = buf;

        while (value > 0) {
            *ptr = hex_digit_lowercase(value % base);
            ++ptr;
            value /= base;
        }

        /// Copy to out reversed.
        while (ptr != buf) {
            --ptr;
            *out = *ptr;
            ++out;
        }
    }
}

/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
  * performs significantly faster than the reference implementation due to the absence of sprintf calls,
  * bounds checking, unnecessary string copying and length calculation.
  * @param src         - pointer to IPv6 (16 bytes) stored in little-endian byte order
  * @param dst         - where to put format result bytes
  * @param zeroed_tail_bytes_count - the parameter is currently not being used
  */
inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) {
    struct {
        Int64 base, len;
    } best {-1, 0}, cur {-1, 0};
    std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {};

    // the current function logic is processed in big endian manner
    // but ipv6 in doris is stored in little-endian byte order
    // so transfer to big-endian byte order first
    // compatible with parse_ipv6 function in format_ip.h
    std::reverse(src, src + IPV6_BINARY_LENGTH);

    /** Preprocess:
        *    Copy the input (bytewise) array into a wordwise array.
        *    Find the longest run of 0x00's in src[] for :: shorthanding. */
    for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) {
        words[i / 2] = (src[i] << 8) | src[i + 1];
    }

    for (size_t i = 0; i < words.size(); i++) {
        if (words[i] == 0) {
            if (cur.base == -1) {
                cur.base = i;
                cur.len = 1;
            } else {
                cur.len++;
            }
        } else {
            if (cur.base != -1) {
                if (best.base == -1 || cur.len > best.len) {
                    best = cur;
                }
                cur.base = -1;
            }
        }
    }

    if (cur.base != -1) {
        if (best.base == -1 || cur.len > best.len) {
            best = cur;
        }
    }
    if (best.base != -1 && best.len < 2) {
        best.base = -1;
    }

    /// Format the result.
    for (size_t i = 0; i < words.size(); i++) {
        /// Are we inside the best run of 0x00's?
        if (best.base != -1) {
            auto best_base = static_cast<size_t>(best.base);
            if (i >= best_base && i < (best_base + best.len)) {
                if (i == best_base) {
                    *dst++ = ':';
                }
                continue;
            }
        }
        /// Are we following an initial run of 0x00s or any real hex?
        if (i != 0) {
            *dst++ = ':';
        }
        /// Is this address an encapsulated IPv4?
        if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) {
            uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
            memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
            // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
            std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
#endif
            format_ipv4(ipv4_buffer, dst,
                        std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)),
                        "0");
            // format_ipv4 has already added a null-terminator for us.
            return;
        }
        print_integer<16>(dst, words[i]);
    }

    /// Was it a trailing run of 0x00's?
    if (best.base != -1 &&
        static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) {
        *dst++ = ':';
    }
}

/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
*
* Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`,
* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
*
* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
*           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
*           To parse strings use overloads below.
*
* @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
* @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
* @param dst         - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long.
* @param first_block - preparsed first block
* @return            - true if parsed successfully, false otherwise.
*/
template <typename T, typename EOFfunction>
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
    const auto clear_dst = [dst]() {
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
        return false;
    };

    if (src == nullptr || eof()) return clear_dst();

    int groups = 0;            /// number of parsed groups
    unsigned char* iter = dst; /// iterator over dst buffer
    unsigned char* zptr =
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started

    std::memset(dst, '\0', IPV6_BINARY_LENGTH);

    if (first_block >= 0) {
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
        if (*src == ':') {
            zptr = iter;
            ++src;
        }
        ++groups;
    }

    bool group_start = true;

    while (!eof() && groups < 8) {
        if (*src == ':') {
            ++src;
            if (eof()) /// trailing colon is not allowed
                return clear_dst();

            group_start = true;

            if (*src == ':') {
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
                    return clear_dst();
                zptr = iter;
                ++src;
                continue;
            }
            if (groups == 0) /// leading colon is not allowed
                return clear_dst();
        }

        /// mixed IPv4 parsing
        if (*src == '.') {
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
                return clear_dst();

            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
                return clear_dst();

            ++src;
            if (eof()) return clear_dst();

            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
            --groups;
            iter -= 2;

            UInt16 num = 0;
            for (int i = 0; i < 2; ++i) {
                unsigned char first = (iter[i] >> 4) & 0x0fu;
                unsigned char second = iter[i] & 0x0fu;
                if (first > 9 || second > 9) return clear_dst();
                (num *= 100) += first * 10 + second;
            }
            if (num > 255) return clear_dst();

            /// parse IPv4 with known first octet
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();

            if constexpr (std::endian::native == std::endian::little)
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);

            iter += 4;
            groups += 2;
            break; /// IPv4 block is the last - end of parsing
        }

        if (!group_start) /// end of parsing
            break;
        group_start = false;

        UInt16 val = 0;  /// current decoded group
        int xdigits = 0; /// number of decoded hex digits in current group

        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
            UInt8 num = unhex(*src);
            if (num == 0xFF) break;
            (val <<= 4) |= num;
        }

        if (xdigits == 0) /// end of parsing
            break;

        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
        *iter++ = static_cast<unsigned char>(val & 0xffu);
        ++groups;
    }

    /// either all 8 groups or all-zeroes block should be present
    if (groups < 8 && zptr == nullptr) return clear_dst();

    /// process all-zeroes block
    if (zptr != nullptr) {
        size_t msize = iter - zptr;
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
    }

    /// the current function logic is processed in big endian manner
    /// but ipv6 in doris is stored in little-endian byte order
    /// so transfer to little-endian
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);

    return true;
}

/// returns pointer to the right after parsed sequence or null on failed parsing
inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) {
    if (parse_ipv6(
                src, [&src, end]() { return src == end; }, dst))
        return src;
    return nullptr;
}

/// returns true if whole buffer was parsed successfully
inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) {
    return parse_ipv6(src, end, dst) == end;
}

/// returns pointer to the right after parsed sequence or null on failed parsing
inline const char* parse_ipv6(const char* src, unsigned char* dst) {
    if (parse_ipv6(
                src, []() { return false; }, dst))
        return src;
    return nullptr;
}

/// returns true if whole null-terminated string was parsed successfully
inline bool parse_ipv6_whole(const char* src, unsigned char* dst) {
    const char* end = parse_ipv6(src, dst);
    return end != nullptr && *end == '\0';
}

} // namespace doris::vectorized
